import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
df = pd.read_csv('STRIKE_REPORTS.csv')
C:\Users\KPMal\AppData\Local\Temp\ipykernel_8900\1840193475.py:1: DtypeWarning: Columns (18,21) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv('STRIKE_REPORTS.csv')
df
| INDEX_NR | INCIDENT_DATE | INCIDENT_MONTH | INCIDENT_YEAR | TIME | TIME_OF_DAY | AIRPORT_ID | AIRPORT | LATITUDE | LONGITUDE | ... | SIZE | NR_INJURIES | NR_FATALITIES | COMMENTS | REPORTED_NAME | REPORTED_TITLE | SOURCE | PERSON | LUPDATE | TRANSFER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 608242 | 6/22/1996 | 6 | 1996 | NaN | NaN | KSMF | SACRAMENTO INTL | 38.69542 | -121.59077 | ... | Medium | NaN | NaN | /Legacy Record=100001/ | REDACTED | REDACTED | Air Transport Report | Air Transport Operations | 12/20/2007 | False |
| 1 | 608243 | 6/26/1996 | 6 | 1996 | NaN | NaN | KDEN | DENVER INTL AIRPORT | 39.85841 | -104.66700 | ... | Medium | NaN | NaN | /Legacy Record=100002/ | REDACTED | REDACTED | Air Transport Report | Air Transport Operations | 12/20/2007 | False |
| 2 | 608244 | 7/1/1996 | 7 | 1996 | NaN | NaN | KOMA | EPPLEY AIRFIELD | 41.30252 | -95.89417 | ... | Medium | NaN | NaN | /Legacy Record=100003/ | REDACTED | REDACTED | Air Transport Report | Air Transport Operations | 12/20/2007 | False |
| 3 | 608245 | 7/1/1996 | 7 | 1996 | NaN | NaN | KIAD | WASHINGTON DULLES INTL ARPT | 38.94453 | -77.45581 | ... | Medium | NaN | NaN | /Legacy Record=100004/ | REDACTED | REDACTED | Air Transport Report | Air Transport Operations | 12/20/2007 | False |
| 4 | 608246 | 7/1/1996 | 7 | 1996 | NaN | NaN | KLGA | LA GUARDIA ARPT | 40.77724 | -73.87261 | ... | Medium | NaN | NaN | /Legacy Record=100005/ | REDACTED | REDACTED | Air Transport Report | Air Transport Operations | 12/20/2007 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 288805 | 1472253 | 8/28/2023 | 8 | 2023 | 21:50 | NaN | ZZZZ | UNKNOWN | NaN | NaN | ... | Small | NaN | NaN | NaN | REDACTED | REDACTED | FAA Form 5200-7-E | Airport Operations | 11/17/2023 | False |
| 288806 | 1472256 | 8/28/2023 | 8 | 2023 | 20:40 | NaN | ZZZZ | UNKNOWN | NaN | NaN | ... | Small | NaN | NaN | NaN | REDACTED | REDACTED | FAA Form 5200-7-E | Airport Operations | 11/17/2023 | False |
| 288807 | 1472257 | 8/28/2023 | 8 | 2023 | 07:14 | Dusk | KDTW | DETROIT METRO WAYNE COUNTY ARPT | 42.21206 | -83.34884 | ... | Small | NaN | NaN | NaN | REDACTED | REDACTED | FAA Form 5200-7-E | Airport Operations | 11/17/2023 | False |
| 288808 | 1472258 | 8/28/2023 | 8 | 2023 | 09:00 | Day | KPIH | POCATELLO REGIONAL ARPT | 42.91131 | -112.59586 | ... | Medium | NaN | NaN | NaN | REDACTED | REDACTED | FAA Form 5200-7-E | Airport Operations | 11/17/2023 | False |
| 288809 | 1472259 | 8/28/2023 | 8 | 2023 | 10:15 | NaN | KAMA | RICK HUSBAND AMARILLO INTL | 35.21937 | -101.70593 | ... | Small | NaN | NaN | NaN | REDACTED | REDACTED | FAA Form 5200-7-E | Carcass Found | 11/17/2023 | False |
288810 rows × 100 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 288810 entries, 0 to 288809 Data columns (total 100 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INDEX_NR 288810 non-null int64 1 INCIDENT_DATE 288810 non-null object 2 INCIDENT_MONTH 288810 non-null int64 3 INCIDENT_YEAR 288810 non-null int64 4 TIME 163633 non-null object 5 TIME_OF_DAY 166394 non-null object 6 AIRPORT_ID 288804 non-null object 7 AIRPORT 288810 non-null object 8 LATITUDE 253309 non-null float64 9 LONGITUDE 253308 non-null float64 10 RUNWAY 218852 non-null object 11 STATE 253309 non-null object 12 FAAREGION 253309 non-null object 13 LOCATION 33939 non-null object 14 ENROUTE_STATE 5037 non-null object 15 OPID 288784 non-null object 16 OPERATOR 288810 non-null object 17 REG 172543 non-null object 18 FLT 129249 non-null object 19 AIRCRAFT 288810 non-null object 20 AMA 205475 non-null object 21 AMO 190159 non-null object 22 EMA 192151 non-null float64 23 EMO 186756 non-null float64 24 AC_CLASS 206893 non-null object 25 AC_MASS 206490 non-null float64 26 TYPE_ENG 206044 non-null object 27 NUM_ENGS 206259 non-null float64 28 ENG_1_POS 205481 non-null float64 29 ENG_2_POS 191612 non-null float64 30 ENG_3_POS 13080 non-null float64 31 ENG_4_POS 3290 non-null float64 32 PHASE_OF_FLIGHT 177843 non-null object 33 HEIGHT 148807 non-null float64 34 SPEED 94237 non-null float64 35 DISTANCE 188234 non-null float64 36 SKY 139382 non-null object 37 PRECIPITATION 11175 non-null object 38 AOS 14196 non-null float64 39 COST_REPAIRS 5040 non-null object 40 COST_OTHER 4760 non-null object 41 COST_REPAIRS_INFL_ADJ 5040 non-null object 42 COST_OTHER_INFL_ADJ 4760 non-null object 43 INGESTED_OTHER 288810 non-null bool 44 INDICATED_DAMAGE 288810 non-null bool 45 DAMAGE_LEVEL 187315 non-null object 46 STR_RAD 288810 non-null bool 47 DAM_RAD 288810 non-null bool 48 STR_WINDSHLD 288810 non-null bool 49 DAM_WINDSHLD 288810 non-null bool 50 STR_NOSE 288810 non-null bool 51 DAM_NOSE 288810 non-null bool 52 STR_ENG1 288810 non-null bool 53 DAM_ENG1 288810 non-null bool 54 ING_ENG1 288810 non-null bool 55 STR_ENG2 288810 non-null bool 56 DAM_ENG2 288810 non-null bool 57 ING_ENG2 288810 non-null bool 58 STR_ENG3 288810 non-null bool 59 DAM_ENG3 288810 non-null bool 60 ING_ENG3 288810 non-null bool 61 STR_ENG4 288810 non-null bool 62 DAM_ENG4 288810 non-null bool 63 ING_ENG4 288810 non-null bool 64 STR_PROP 288810 non-null bool 65 DAM_PROP 288810 non-null bool 66 STR_WING_ROT 288810 non-null bool 67 DAM_WING_ROT 288810 non-null bool 68 STR_FUSE 288810 non-null bool 69 DAM_FUSE 288810 non-null bool 70 STR_LG 288810 non-null bool 71 DAM_LG 288810 non-null bool 72 STR_TAIL 288810 non-null bool 73 DAM_TAIL 288810 non-null bool 74 STR_LGHTS 288810 non-null bool 75 DAM_LGHTS 288810 non-null bool 76 STR_OTHER 288810 non-null bool 77 DAM_OTHER 288810 non-null bool 78 OTHER_SPECIFY 38462 non-null object 79 EFFECT 13718 non-null object 80 EFFECT_OTHER 2221 non-null object 81 BIRD_BAND_NUMBER 390 non-null float64 82 SPECIES_ID 288806 non-null object 83 SPECIES 288809 non-null object 84 REMARKS 264305 non-null object 85 REMAINS_COLLECTED 288810 non-null bool 86 REMAINS_SENT 288810 non-null bool 87 WARNED 288810 non-null object 88 NUM_SEEN 94773 non-null object 89 NUM_STRUCK 288174 non-null object 90 SIZE 263538 non-null object 91 NR_INJURIES 276 non-null float64 92 NR_FATALITIES 24 non-null float64 93 COMMENTS 218834 non-null object 94 REPORTED_NAME 288810 non-null object 95 REPORTED_TITLE 288810 non-null object 96 SOURCE 288810 non-null object 97 PERSON 267676 non-null object 98 LUPDATE 288810 non-null object 99 TRANSFER 288810 non-null bool dtypes: bool(37), float64(17), int64(3), object(43) memory usage: 149.0+ MB
df.describe()
| INDEX_NR | INCIDENT_MONTH | INCIDENT_YEAR | LATITUDE | LONGITUDE | EMA | EMO | AC_MASS | NUM_ENGS | ENG_1_POS | ENG_2_POS | ENG_3_POS | ENG_4_POS | HEIGHT | SPEED | DISTANCE | AOS | BIRD_BAND_NUMBER | NR_INJURIES | NR_FATALITIES | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2.888100e+05 | 288810.000000 | 288810.000000 | 2.533090e+05 | 253308.000000 | 192151.000000 | 186756.000000 | 206490.000000 | 206259.000000 | 205481.000000 | 191612.000000 | 13080.000000 | 3290.000000 | 148807.000000 | 94237.000000 | 188234.000000 | 14196.000000 | 3.900000e+02 | 276.000000 | 24.000000 |
| mean | 8.736693e+05 | 7.175974 | 2011.697798 | 2.012754e+02 | -90.880247 | 19.755812 | 8.522377 | 3.538312 | 2.013929 | 2.814956 | 2.667536 | 2.900306 | 2.043161 | 865.960358 | 142.585057 | 0.814466 | 91.454416 | 8.811022e+07 | 1.293478 | 2.041667 |
| std | 2.526448e+05 | 2.765082 | 8.432311 | 8.177522e+04 | 327.243918 | 10.724278 | 12.866405 | 0.867312 | 0.420133 | 2.113806 | 1.981032 | 1.950649 | 1.435114 | 1843.098801 | 46.650050 | 3.547950 | 661.074472 | 9.110186e+07 | 0.756134 | 1.654484 |
| min | 6.082420e+05 | 1.000000 | 1990.000000 | -3.767333e+01 | -177.381000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000 | 1.000000 |
| 25% | 6.893202e+05 | 5.000000 | 2006.000000 | 3.298764e+01 | -98.469780 | 10.000000 | 1.000000 | 3.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 120.000000 | 0.000000 | 1.000000 | 0.000000e+00 | 1.000000 | 1.000000 |
| 50% | 7.646935e+05 | 7.000000 | 2014.000000 | 3.880581e+01 | -87.904460 | 22.000000 | 4.000000 | 4.000000 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 50.000000 | 140.000000 | 0.000000 | 3.000000 | 7.897730e+07 | 1.000000 | 1.500000 |
| 75% | 1.064714e+06 | 9.000000 | 2019.000000 | 4.085010e+01 | -80.417940 | 31.000000 | 10.000000 | 4.000000 | 2.000000 | 5.000000 | 5.000000 | 5.000000 | 4.000000 | 900.000000 | 160.000000 | 0.000000 | 24.000000 | 1.893343e+08 | 1.000000 | 2.000000 |
| max | 1.472259e+06 | 12.000000 | 2023.000000 | 4.115443e+07 | 164140.000000 | 92.000000 | 2301.000000 | 5.000000 | 4.000000 | 7.000000 | 7.000000 | 6.000000 | 5.000000 | 31300.000000 | 1250.000000 | 99.000000 | 62848.000000 | 2.821218e+08 | 7.000000 | 8.000000 |
# Dropping duplicate data
df.drop_duplicates(inplace=True)
df.shape
(288810, 100)
# Incidents by year
groupedYr_df = df.groupby('INCIDENT_YEAR').size().reset_index(name='Count')
groupedYr_df
| INCIDENT_YEAR | Count | |
|---|---|---|
| 0 | 1990 | 2120 |
| 1 | 1991 | 2516 |
| 2 | 1992 | 2651 |
| 3 | 1993 | 2624 |
| 4 | 1994 | 2708 |
| 5 | 1995 | 2825 |
| 6 | 1996 | 3031 |
| 7 | 1997 | 3559 |
| 8 | 1998 | 3809 |
| 9 | 1999 | 5120 |
| 10 | 2000 | 6024 |
| 11 | 2001 | 5825 |
| 12 | 2002 | 6222 |
| 13 | 2003 | 5992 |
| 14 | 2004 | 6606 |
| 15 | 2005 | 7270 |
| 16 | 2006 | 7295 |
| 17 | 2007 | 7751 |
| 18 | 2008 | 7638 |
| 19 | 2009 | 9509 |
| 20 | 2010 | 9898 |
| 21 | 2011 | 10110 |
| 22 | 2012 | 10932 |
| 23 | 2013 | 11410 |
| 24 | 2014 | 13692 |
| 25 | 2015 | 13772 |
| 26 | 2016 | 13323 |
| 27 | 2017 | 14776 |
| 28 | 2018 | 16202 |
| 29 | 2019 | 17341 |
| 30 | 2020 | 11620 |
| 31 | 2021 | 15638 |
| 32 | 2022 | 17226 |
| 33 | 2023 | 11775 |
sns.set(rc={'figure.figsize':(20,15)})
#sns.set_style("ticks")
sns.set(rc={'axes.facecolor':'#000000', 'figure.facecolor':'#EBEBEB'})
sns.barplot(groupedYr_df, x="INCIDENT_YEAR", y="Count")
#plt.show(bar_plot)
C:\Users\KPMal\anaconda3\envs\Py3-9\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\KPMal\anaconda3\envs\Py3-9\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\Users\KPMal\anaconda3\envs\Py3-9\lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector):
<Axes: xlabel='INCIDENT_YEAR', ylabel='Count'>
fig = go.Figure(data=go.Scattergeo(
lon = df['LONGITUDE'],
lat = df['LATITUDE'],
text = df['INCIDENT_YEAR'],
mode = 'markers',
marker_color = df['NR_FATALITIES'],
))
fig.update_layout(
title = 'U.S. Strike Locations',
geo_scope='usa',
)
fig.show()